{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44cb4be1-fe94-4f24-bee5-d590045c36ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6678b895-df28-4cc2-b2ec-0b33fe821b45",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载数据集\n",
    "# 如果这里是很大的数据源，这里可以著名从那里下载即可，比如您可以下载这个 data.csv 从 http://xxxx.com/data.csv 处下载\n",
    "data = pd.read_csv('data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c7c861f-68c8-4ac6-a755-0629b682a1ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 打印数据集\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0314df46",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 加载 y 和 x\n",
    "# 此处 y 是毕业的成绩 x1 为 sat 的称\n",
    "# 最终的公式 y=f(x)\n",
    "y = data['GPA']\n",
    "x1 = data['SAT']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9217439f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 打印 y \n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a334c0de",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 打印 x\n",
    "x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dbf753a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 绘制训练集的座标点\n",
    "plt.scatter(x1,y)\n",
    "plt.xlabel('SAT', fontsize = 20)\n",
    "plt.ylabel('GPA', fontsize = 20)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "886304a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看数据集中的各项关键值\n",
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d64ca6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 调用 python 来使用最小2乘法来得到 slop 值\n",
    "x = sm.add_constant(x1)\n",
    "results = sm.OLS(y,x).fit()\n",
    "results.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "826170b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 绘制回归线\n",
    "plt.scatter(x1,y)\n",
    "yhat = 0.0017*x1 + 0.275\n",
    "fig = plt.plot(x1,yhat, lw=4, c='orange', label = 'regression line')\n",
    "plt.xlabel('SAT', fontsize = 20)\n",
    "plt.ylabel('GPA', fontsize = 20)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4353737",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义一个预测函数\n",
    "def prediction(x):\n",
    "    print(0.0017 * x + 0.275)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "464bcb56",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测 2000 分的 SAT 的 GPA\n",
    "prediction(2000)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
